# high confidence clouds or high confidence cloud shadows or fill values
mask_medconf <- function(x){
bs <- intToBits(x)
if ( ((bs[1]) | # cloud
(bs[3]) | # shadow
(bs[4]) | # snow
(bs[5]) | # water
(bs[6]) | # aerosol (only low quality)
(bs[8]) | # subzero
(bs[9]) | # saturation
(bs[10])) # High sun zenith flag
== 1){
return("flag") } else {
return("valid")
}
}
# due to the nature of the intToBits function it is not possible to perform
# this step in a mutate pipe. Therefor a vector is created via lapply,
# joined to then joined to the fs_LND df and finally flaged pixels can be removed
mask_idex <- lapply(raw_fs$QAI, mask_medconf) %>% unlist()
fs_LND_filtered <- raw_fs %>%
mutate(QAI = mask_idex) %>%
filter(QAI == "valid")# Katja´s method to filter QAI values
raw_fs_KK <- raw_fs
raw_fs_KK$qa_cloud <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 1), 3)
raw_fs_KK$qa_shadow <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 3), 1)
raw_fs_KK$qa_snow <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 4), 1)
raw_fs_KK$qa_water <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 5), 1)
raw_fs_KK$qa_aerosol <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 6), 3)
raw_fs_KK$qa_subzero <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 8), 1)
raw_fs_KK$qa_saturation <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 9), 1)
raw_fs_KK$qa_zenith <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 10), 1)
raw_fs_KK$qa_illumination <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 11), 3)
raw_fs_KK$qa_slope <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 13), 1)
raw_fs_KK$qa_vapor <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 14), 1)
raw_fs_KK$quality <- 0
raw_fs_KK$quality[raw_fs_KK$qa_cloud != 0 |raw_fs_KK$qa_snow != 0 |raw_fs_KK$qa_shadow != 0] <- 1
raw_fs_KK <- raw_fs_KK[raw_fs_KK$quality == 0, ]print(paste("number of observations following SHS method for QAI filtering:", round(nrow(fs_LND_filtered)/6)) )## [1] "number of observations following SHS method for QAI filtering: 79063"
print(paste("number of observations following KK method QAI filtering:", round(nrow(raw_fs_KK)/6)) )## [1] "number of observations following KK method QAI filtering: 74693"
print(paste("The difference in included observation:", round(nrow(fs_LND_filtered)/nrow(raw_fs_KK), digits = 2), "% (n=", (nrow(fs_LND_filtered)-nrow(raw_fs_KK)), ")") )## [1] "The difference in included observation: 1.06 % (n= 26220 )"
As seen above the differences in the filtering methods and parameterization is very marginal, at ~1% difference in amount of included observations
##Additional data filtering and auxilirary varaible and indicity computation
fs_LND <- fs_LND_filtered %>%
# reduce file size for testing
#sample_n(10000) %>%
# filter out extreme values
filter(across(BLUE:SWIR2, ~ . > 0),
across(BLUE:SWIR2, ~ . < 10000)) %>%
mutate(date = as.Date(str_sub(scene, 1, 9),format = "%Y%m%d"),
month = lubridate::month(date),
year = lubridate::year(date),
month_year = paste0(month,"_",year),
doy = lubridate::yday(date),
sensor = str_sub(scene, -5, -1),
SWIR_ratio = SWIR2/SWIR1,
NDVI = ((NIR-RED)/(NIR+RED)),
NDTI = ((SWIR1-SWIR2)/(SWIR1+SWIR2))) # source: https://www.mdpi.com/2072-4292/10/10/1657/htm## Warning: Using `across()` in `filter()` is deprecated, use `if_any()` or
## `if_all()`.
## Warning: Using `across()` in `filter()` is deprecated, use `if_any()` or
## `if_all()`.
ggplot(fs_LND, aes(doy, NDVI, color=year, group=year)) +
geom_smooth() +
scale_colour_viridis_c(option = "D") +
theme_minimal()## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Here it can be seen that over time, average NDVI maximum values tend to increase over time, particularly early in the year before the growing season. Over such a long time frame it is possible though that this chance could be attributed to more irrigation, climate change, or LULC rather been strictly being resultant of deviations in sensor spec?
ggplot(na.omit(fs_LND), aes(doy, NDTI, color=year, group=year)) +
geom_smooth() +
scale_colour_viridis_c(option = "D") +
theme_minimal()## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Similar story here, with NDTI decreasing over time
fs_LND_long <- fs_LND %>%
pivot_longer(cols=c("BLUE":"SWIR2"),names_to = "wavelength", values_to = "reflectance") %>%
as.data.frame() %>%
mutate(wavelength_num = case_when(wavelength == "BLUE" ~ 482,
wavelength == "GREEN" ~ 562,
wavelength == "RED" ~ 655,
wavelength == "NIR" ~ 865,
wavelength == "SWIR1" ~ 1610,
wavelength == "SWIR2" ~ 2200))ggplot(fs_LND_long, aes(wavelength_num, reflectance, color=year, group=year)) +
# geom_smooth(formula = y ~ s(x, bs = "cs", k=6)) +
stat_summary(fun=mean, geom="line", size = 1) + # draw a mean line in the data
scale_colour_viridis_c(option = "D") +
theme_minimal()When plotting a simple mean of spectra across years it can be seen that more recent years dont defaco have higher reflectance values across the entire electromagnetic spectrum
ggplot(fs_LND_long, aes(wavelength, reflectance, color=sensor)) +
# geom_jitter() +
geom_boxplot() +
scale_colour_viridis_d(option = "D") +
theme_minimal()Looks like lots of extreme values (even post QAI filter), but when looking at the next density plot section, you can that relatively speaking, these extreme outlying values are exceedinly rare and marginal.
# facet wrap by wavelength
ggplot(fs_LND_long, aes(reflectance, color=sensor, fill=sensor)) +
geom_density(alpha = 0.05) +
#geom_jitter() +
scale_colour_viridis_d(option = "D") +
scale_fill_viridis_d(option = "D") +
scale_x_continuous(expand = c(0, 0)) +
#scale_y_continuous(expand = c(0, 0), limits = c(0, 0.02)) +
theme_minimal() +
guides(col = guide_legend(nrow = 3))+
theme(legend.position = "bottom") +
facet_wrap(~wavelength)# facet wrap by sensor
ggplot(fs_LND_long, aes(reflectance, color=wavelength, color=wavelength)) +
geom_density(alpha = 0.05) +
#geom_jitter() +
scale_colour_viridis_d(option = "D") +
scale_fill_viridis_d(option = "D") +
scale_x_continuous(expand = c(0, 0)) +
#scale_y_continuous(expand = c(0, 0), limits = c(0, 0.02)) +
theme_minimal() +
guides(col = guide_legend(nrow = 3))+
theme(legend.position = "bottom") +
facet_wrap(~sensor)## Warning: Duplicated aesthetics after name standardisation: colour
ggplot(fs_LND_long, aes(x = reflectance, y = sensor, fill = stat(x))) +
geom_density_ridges_gradient(scale = 1.2, rel_min_height = 0.01) +
scale_fill_viridis_c(name = "reflectance", option = "D") +
scale_x_continuous(expand = c(0, 0), limits = c(0, 15000)) +
theme_minimal() +
facet_wrap(~wavelength)## Picking joint bandwidth of 21.2
## Picking joint bandwidth of 23.4
## Picking joint bandwidth of 116
## Picking joint bandwidth of 34.6
## Picking joint bandwidth of 55.5
## Picking joint bandwidth of 43.4
ggplot(fs_LND_long, aes(x = reflectance, y = wavelength, fill = stat(x))) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis_c(name = "reflectance", option = "D") +
scale_x_continuous(expand = c(0, 0), limits = c(0, 15000)) +
theme_minimal() +
facet_wrap(~sensor)## Picking joint bandwidth of 65.6
## Picking joint bandwidth of 33.9
## Picking joint bandwidth of 33.8
## Picking joint bandwidth of 39.3
## Picking joint bandwidth of 72.2
reference_spectra <- read.csv("data/feature_space/sli_gen_dark_soils_0p4.csv",
encoding = "UTF-8") %>%
mutate(SWIR_ratio = SWIR2/SWIR1,
NDVI = ((NIR-RED)/(NIR+RED)))
ggplot(reference_spectra, aes(NDVI, SWIR_ratio, color = cover)) +
geom_point() +
scale_colour_viridis_d(option = "D") +
theme_minimal() # No dissagregation
ggplot(fs_LND, aes(NDVI, SWIR_ratio)) +
geom_bin2d(bins = 300) +
geom_point(data=reference_spectra, aes(NDVI, SWIR_ratio), color ="red") +
scale_fill_continuous(type = "viridis") +
theme_minimal() +
scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2))## Warning: Removed 55 rows containing non-finite values (stat_bin2d).
Here we can see that there are two dense zones in the spectral feature spaces. The smaller and fainter zone is predominately populated by landsat 7 pixels (clearly seen in the following plot)
ggplot(fs_LND, aes(NDVI, SWIR_ratio)) +
geom_bin2d(bins = 200) +
geom_point(data=reference_spectra, aes(NDVI, SWIR_ratio), color ="red") +
scale_fill_continuous(type = "viridis") +
theme_minimal() +
scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2)) +
facet_wrap(~sensor)## Warning: Removed 55 rows containing non-finite values (stat_bin2d).
Here we can see that the spectral library fits particularly well to Landsat 8 (probably do to the fact that is was created using lansat 8 pixels?).
ggplot(fs_LND, aes(NDVI, SWIR_ratio)) +
geom_bin2d(bins = 100) +
geom_point(data=reference_spectra, aes(NDVI, SWIR_ratio), color ="red") +
scale_fill_continuous(type = "viridis") +
theme_minimal() +
scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2)) +
facet_wrap(~month)## Warning: Removed 55 rows containing non-finite values (stat_bin2d).
## Warning: Removed 1 rows containing missing values (geom_tile).
Here we can also see that across all sensors, the spectral library seems to triangulate the data points better during growing season months.
Displayed with only point per reference class to improve interpretability
ggplot(fs_LND, aes(NDVI, SWIR_ratio)) +
geom_bin2d(bins = 100) +
geom_point(data=reference_spectra[c(1,3,25),], aes(NDVI, SWIR_ratio), color ="red") +
scale_fill_continuous(type = "viridis") +
theme_minimal() +
scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2)) +
facet_wrap(~year)## Warning: Removed 55 rows containing non-finite values (stat_bin2d).
## Warning: Removed 1 rows containing missing values (geom_tile).
ggplot(fs_LND, aes(NDTI, SWIR_ratio)) +
geom_point() +
# geom_bin2d(bins = 10) +
#geom_point(data=reference_spectra, aes(NDVI, SWIR_ratio), color ="red") +
scale_fill_continuous(type = "viridis") +
theme_minimal()# +# scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
# scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2)) NDTI is the inverse of the SWIR ratio. Therefor the conventional feature space comparison inst really appropriate.
# old computationally exorbinant method of and plotting density plots
# # Get density of points in 2 dimensions.
# # @param x A numeric vector.
# # @param y A numeric vector.
# # @param n Create a square n by n grid to compute density.
# # @return The density within each square.
# get_density <- function(x, y, ...) {
# dens <- MASS::kde2d(x, y, ...)
# ix <- findInterval(x, dens$x)
# iy <- findInterval(y, dens$y)
# ii <- cbind(ix, iy)
# return(dens$z[ii])
# }
#
# fs_density <- as.data.frame(fs_LND) %>%
# filter(SWIR_ratio != "Inf") %>%
# mutate(density = get_density((.)$NDVI, (.)$SWIR_ratio, n = 500))
# ggplot(fs_density, aes(NDVI, SWIR_ratio, color = density)) +
# geom_point() +
# geom_point(data=reference_spectra, aes(NDVI, SWIR_ratio), color ="red") +
# scale_colour_viridis_c(option = "D") +
# theme_minimal() +
# scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
# scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2)) +
# facet_wrap(~sensor)fs_LND |>
group_by( year, sensor) %>%
summarise(n_observation = n()) %>%
ggplot(., aes(year, n_observation, color=sensor)) +
geom_line() +
scale_fill_continuous(type = "viridis") +
scale_colour_viridis_d(option = "D") +
theme_minimal() ## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
fs_LND_obs_overview <- fs_LND %>%
group_by(month, year, sensor) %>%
summarise(n_observation = n()) %>%
mutate(timestamp = zoo::as.yearmon(paste0(month,"_", year), format="%m_%Y")) ## `summarise()` has grouped output by 'month', 'year'. You can override using the
## `.groups` argument.
ggplot(fs_LND_obs_overview, aes(timestamp, n_observation, color=sensor)) +
geom_line(size=1) +
scale_fill_continuous(type = "viridis") +
scale_colour_viridis_d(option = "D") +
scale_x_continuous(breaks=seq(1984,2022,2)) +
theme_minimal()ggplot(fs_LND_obs_overview, aes(x=month, y=n_observation, group=month,fill=sensor, color=sensor)) +
# geom_boxplot(alpha=0.3, notch = T) +
geom_violin(alpha=0.3) +
geom_jitter(alpha=0.3) +
#geom_density_ridges_gradient(scale = 1.2, rel_min_height = 0.01) +
scale_colour_viridis_d(option = "D") +
scale_fill_viridis_d(option = "D") +
theme_minimal() +
scale_x_continuous(breaks=seq(1,12,1)) +
scale_y_continuous(limits = c(0, 2000)) +
facet_wrap(~sensor)## Warning: Removed 6 rows containing non-finite values (stat_ydensity).
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Warning in max(data$density): no non-missing arguments to max; returning -Inf
## Warning: Computation failed in `stat_ydensity()`:
## replacement has 1 row, data has 0
## Warning: Removed 6 rows containing missing values (geom_point).
fs_LND_loose <- fs_LND_long %>%
filter(sensor %in% c("LND07", "LND08")) %>%
mutate(obs_time_place = paste0(plyr::round_any(doy, 2, f = floor),"_", year,"_",POINT_ID)) %>%
group_by(obs_time_place) %>%
filter(n() > 6)Total number of observations that match the loose condition of being within ten days of each other: 3.0466^{4}
Days of year with available observations: 2, 3, 6, 7, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23, 30, 31, 36, 37, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 332, 333, 334, 335, 338, 339, 340, 341, 342, 343, 346, 347, 350, 351, 356, 357, 362, 363
for (i in 1:4) {
scens <- unique(fs_LND_loose$obs_time_place)[c((((i-1)*20)+1):((i)*20))]
fs_LND_loose_subset <- fs_LND_loose %>%
filter(obs_time_place %in% scens)
print(ggplot( fs_LND_loose_subset, aes(wavelength_num, reflectance, color=sensor, fill=sensor)) +
# geom_density(alpha = 0.05) +
geom_line() +
# scale_colour_viridis_d(option = "D") +
#scale_fill_viridis_d(option = "D") +
scale_x_continuous(expand = c(0, 0)) +
#scale_y_continuous(expand = c(0, 0), limits = c(0, 0.02)) +
theme_minimal() +
guides(col = guide_legend(nrow = 3))+
theme(legend.position = "bottom",
axis.text.y = element_text(angle = 45),
strip.text.y = element_text(size = 8, angle = 330)) +
facet_wrap( ~obs_time_place) )
}Summary statistics for sensor variance between all sensors
fs_LND_sensor_summary <- fs_LND_long %>%
mutate(obs_time_place = paste0( round(doy, digits = -1),"_", year,"_",POINT_ID)) %>%
group_by(obs_time_place) %>%
filter(n() > 6) %>%
group_by(obs_time_place, sensor) %>%
summarise(SD = sd(reflectance))## `summarise()` has grouped output by 'obs_time_place'. You can override using
## the `.groups` argument.
ggplot(fs_LND_sensor_summary, aes(x = SD, y = sensor, fill = stat(x))) +
geom_density_ridges_gradient(scale = 1, rel_min_height = 0.01) +
scale_fill_viridis_c(name = "SD", option = "D") +
# scale_x_continuous(expand = c(0, 0), limits = c(0, 15000)) +
theme_minimal()## Picking joint bandwidth of 48.2
#
# fs_LND_loose %>%
# filter(sensor %in% c("LND07", "LND07")) %>%
# mutate(LND05 = case_when(sensor=="LND05" ~ reflectance),
# LND07 = case_when(sensor=="LND07" ~ reflectance))
#
#
# ggplot(data = fs_LND_loose) +
# geom_bin2d(aes(fs_LND_loose$reflectance[fs_LND_loose$sensor=="LND07"],
# fs_LND_loose$reflectance[fs_LND_loose$sensor=="LND08"]), bins = 200) +
# scale_colour_viridis_d(option = "D") +
# theme_minimal() +
# facet_wrap(~wavelength)
#
#
# geom_bin2d(bins = 200) +
# scale_fill_continuous(type = "viridis") +
# theme_minimal() +
# scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
# scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2)) +
# facet_wrap(~sensor)doy agnostic plots for single locations
fs_LND_7_8 <- fs_LND_long %>%
filter(sensor %in% c("LND07", "LND08"),
year == 2020,
doy %in% c(93:120),
POINT_ID %in% c(40763060:40643065))
fs_LND_7_8 <- fs_LND_long %>%
filter(sensor %in% c("LND07", "LND08"),
year == 2020,
doy %in% c(96:319),
POINT_ID %in% c(40763060:40764068))
# facet wrap by wavelength
ggplot(fs_LND_7_8, aes(reflectance, color=wavelength, fill=wavelength)) +
geom_density(alpha = 0.05) +
#geom_jitter() +
scale_colour_viridis_d(option = "D") +
scale_fill_viridis_d(option = "D") +
scale_x_continuous(expand = c(0, 0)) +
#scale_y_continuous(expand = c(0, 0), limits = c(0, 0.02)) +
theme_minimal() +
guides(col = guide_legend(nrow = 3))+
theme(legend.position = "bottom") +
facet_grid(rows = vars(POINT_ID), cols = vars(sensor))fs_LND_exact <- fs_LND_long %>%
mutate(obs_time_place = paste0(doy,"_", year,"_",POINT_ID)) %>%
group_by(obs_time_place) %>%
filter(n() > 6)
unique(fs_LND_exact$sensor)## [1] "LND07" "LND09"
# facet wrap by wavelength
ggplot(fs_LND_exact[c(8000:15000),], aes(reflectance, color=wavelength, fill=wavelength)) +
geom_density(alpha = 0.05) +
#geom_jitter() +
scale_colour_viridis_d(option = "D") +
scale_fill_viridis_d(option = "D") +
scale_x_continuous(expand = c(0, 0)) +
#scale_y_continuous(expand = c(0, 0), limits = c(0, 0.02)) +
theme_minimal() +
guides(col = guide_legend(nrow = 3))+
theme(legend.position = "bottom") +
facet_grid(rows = vars(doy), cols = vars(sensor)) Only observation for landsat 7 and 9 exactly match. Visually the correspondence between the spectra is pretty decent though.